{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 训练\n",
    "\n",
    "我们对训练集采用随机森林模型，并评估模型效果"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Populating the interactive namespace from numpy and matplotlib\n"
     ]
    }
   ],
   "source": [
    "%pylab inline\n",
    "# 导入训练集、验证集和测试集\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "samtrain = pd.read_csv('samtrain.csv')\n",
    "samval = pd.read_csv('samval.csv')\n",
    "samtest = pd.read_csv('samtest.csv')\n",
    "\n",
    "# 使用 sklearn的随机森林模型，其模块叫做 sklearn.ensemble.RandomForestClassifier\n",
    "\n",
    "# 在这里我们需要将标签列 ('activity') 转换为整数表示，\n",
    "# 因为Python的RandomForest package需要这样的格式。  \n",
    "\n",
    "# 其对应关系如下：\n",
    "# laying = 1, sitting = 2, standing = 3, walk = 4, walkup = 5, walkdown = 6\n",
    "\n",
    "import randomforests as rf\n",
    "samtrain = rf.remap_col(samtrain,'activity')\n",
    "samval = rf.remap_col(samval,'activity')\n",
    "samtest = rf.remap_col(samtest,'activity')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sklearn.ensemble as sk\n",
    "rfc = sk.RandomForestClassifier(n_estimators=500, oob_score=True)\n",
    "train_data = samtrain[samtrain.columns[1:-2]]\n",
    "train_truth = samtrain['activity']\n",
    "model = rfc.fit(train_data, train_truth)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.98174904942965779"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用 OOB (out of band) 来对模型的精确度进行评估.\n",
    "rfc.oob_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(0.048788075395111638, 'tAccMean'),\n",
       " (0.044887862923922571, 'tAccStd'),\n",
       " (0.044231502495174914, 'tJerkMean'),\n",
       " (0.04892499919665521, 'tGyroJerkMagSD'),\n",
       " (0.058161561399143025, 'fAccMean'),\n",
       " (0.0448666616780896, 'fJerkSD'),\n",
       " (0.14045995765086935, 'angleGyroJerkGravity'),\n",
       " (0.16538335816293095, 'angleXGravity'),\n",
       " (0.047154808012715918, 'angleYGravity')]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 用 \"feature importance\" 得分来看最重要的10个特征\n",
    "fi = enumerate(rfc.feature_importances_)\n",
    "cols = samtrain.columns\n",
    "[(value,cols[i]) for (i,value) in fi if value > 0.04]\n",
    "## 这个值0.4是我们通过经验选取的，它恰好能够提供10个最好的特征。\n",
    "## 改变这个值的大小可以得到不同数量的特征。\n",
    "## 下面这句命令是防止你修改参数弄乱了后回不来的命令备份。\n",
    "## [(value,cols[i]) for (i,value) in fi if value > 0.04]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "我们对验证集和测试集使用predict()方法，并得到相应的误差。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 因为pandas的 data frame 在第0列增加了一个假的未知列，所以我们从第1列开始。\n",
    "# not using subject column, activity ie target is in last columns hence -2 i.e dropping last 2 cols\n",
    "\n",
    "val_data = samval[samval.columns[1:-2]]\n",
    "val_truth = samval['activity']\n",
    "val_pred = rfc.predict(val_data)\n",
    "\n",
    "test_data = samtest[samtest.columns[1:-2]]\n",
    "test_truth = samtest['activity']\n",
    "test_pred = rfc.predict(test_data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 输出误差 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean accuracy score for validation set = 0.834911\n",
      "mean accuracy score for test set = 0.900337\n"
     ]
    }
   ],
   "source": [
    "print(\"mean accuracy score for validation set = %f\" %(rfc.score(val_data, val_truth)))\n",
    "print(\"mean accuracy score for test set = %f\" %(rfc.score(test_data, test_truth)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[293,   0,   0,   0,   0,   0],\n",
       "       [  0, 224,  40,   0,   0,   0],\n",
       "       [  0,  29, 254,   0,   0,   0],\n",
       "       [  0,   0,   0, 197,  26,   6],\n",
       "       [  0,   0,  16,   1, 173,  26],\n",
       "       [  0,   0,   0,   3,  14, 183]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用混淆矩阵来观察哪些活动被错误分类了。\n",
    "# 详细说明请看 [5]\n",
    "import sklearn.metrics as skm\n",
    "test_cm = skm.confusion_matrix(test_truth,test_pred)\n",
    "test_cm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 混淆矩阵可视化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPgAAAD0CAYAAAC2E+twAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGmVJREFUeJzt3Xu4JFV57/HvbwYYISggKMoAMyCSAOcokIgHiTDEIzJK\nMDEmAj4nCImeE+KBR2KCoHmGMd4wEaIIJ4mQeURFRHwQSVAGAhsDclMYGS4DCAwMIzNyvzhhmMt7\n/lhrQ++mL7W7+lK79u/zPPXs7urqWqt399tr1arqdykiMLN6mjHqCpjZ4DjAzWrMAW5WYw5wsxpz\ngJvVmAPcrMYqH+CSXiHpUklPSfpOif0cJelH/azbqEj6XUl39fjc3SXdKulpSR/td92GSdIcSRsl\nVf5zPCp9+8fkALpZ0rOSVkr6d0kH9GHX7wdeA2wTER/odScRcX5EHNqH+gxU/sDu2mmbiLg2Ivbo\nsYi/Aa6KiK0i4qs97uNFkhZIOq/sfvK+ur72FgpdyCHpIEkreqjWlNaXAJd0InA68BngtcDOwFnA\n7/dh93OAe2L6XJHT8XVKmlly/3OAO3p5Yh/K7maQ77H6vf+tpVDxZXk/yy4sIkotwKuAZ4H3ddhm\nM+AfgZXAw8AZwKb5sYOAFcCJwOq8zdH5sVOBtcALwDPAMcAC4BsN+54DbARm5PsfAu7L298HHJnX\nHw38Z8Pz3gbcBDwJ3Ajs3/DY1cCngWvzfn4EvLrNaxuv/1831P+9wHzgbuAx4OSG7d8C/CSXuxI4\nE9gkP3ZNfi3P5XL/uGH/fwM8Anx9fF1+zq7A48De+f4OwK+AA1vU9T+A9cB/5f3vlt+/8/JzHgA+\n2bD90fl/cHp+HZ9u2t+78vuzNn8Gbm34TJwD/DLX/e8A5cfeAIwBT+Uyv93utbeo/wzgH4BHgV8A\nxwEbmt77O/PzfwF8JK/fAliTX/uz+fHXdXovCn724zMFFyDKxlpP8dmHAH8XKQBndNjm0/kfuW1e\nrgMWNgTIOlLgzsyB8Wtgq/z4AuC8hn01358z/ibnN/JpYLf82PbAHg0f1h/n29sATwBH5ecdke9v\n0xDg9+YP46x8/3MdAnwd8Mlc/z/PH9xv5vrsmT9cc/L2+wL7kVqUnUmt6fEN+9sI7NJi/58DNs31\nOQh4qGGbPwNuBzYHLgdO6/BeXA0c23D/PODiXNc5pC+lYxr+Z+tyIM0AZrXY34T3I6+7GDgbeAWw\nHXAD8OH82PnkLzzSF//b2r32FmX9H1IA7wBsDVzFxACfD8zNt9+eP0d7N/wfH2raX8f3okiAn1Zw\nYUQB3o8u+rbAYxGxscM2R5EC+vGIeBxYCPyvhsdfAP4uIjZExA9J3+K/2WN9NgD/XdIrImJ1RLQa\njHoPqdt/fkRsjIgLgGVMPKRYFBH3RcRa4EJg7w5lvkD6AtgAXED6UP9jRKyJiDtJH8o3A0TELRFx\nUyQPAf9C+vA1UovXtCAi1uX6TBAR55JarBtJX2qf6lDXlwpJg1MfAD6R6/og8CUmvjcrI+Ls/H96\nWdkt9vlaUqB9LCKej4jHSL23I/Im64A5kmZHxAsR8ZPmXXTY/R+T/q+/jIingM83PhgRP4yI5fn2\nfwKLSYHeUsH3oqNNCi6j0o8AfxzYrstI5g7AQw33H8zrXtxH0xfEGmDLyVYkItaQPrB/ATySR99b\nfVHskOvQ6EFgdsP9VZOoz+ORv9JJ3V9IrTgN67YEkPTGXK9HJD0FfJb0hdDJoxGxrss25wB7AWcW\n2HbcdqTPX/N70/h/mOzA1BxST+MRSU9IehL4J9JAKaRDmRnATZKWSjpmEvveoak+E95DSfMlXS/p\n8VzufDr8b3t8LybYvOAyKv0I8OtJx2B/0GGblaQ3ftwc0vFZL35N6k6Oe33jgxFxRUQcQjrGupv0\nrdzsl8DcpnU753oO2v8D7gLeEBFbk7r2nVot6D7w9hukVvJc4FRJWxesy2PkFrVh3Rwm/h+6DUw1\nP74CeB7YNiJeHRHbRMTWEfEmgIj4VUR8JCJmk7rcZ09i5PwRYKemugIgaTPgIuCLwGsiYhvgh7z0\nv231Onp5LybYtOAyKqUDPCKeIR2HnSXpvZI2l7RJ/jb9Qt7sAuBTkraTtB3wt8A3eixyCXCgpJ0k\nbQV8YvwBSa+VdLikLUgf3OdIx3XNLgPeKOkISTMlfQDYA7i0xzpNxiuBZyJijaTfIvU2Gq0iDZxN\nxleAmyLiI6TX9s9FnpR7TRcCn5W0paQ5wMeY3HuzGpgrSXmfq0hd4zMkvVLJrpIOBJD0fknjPYSn\nSO/P+HvU7bVfCBwvabakbYCTGh7bLC+PRcRGSfOBQ5rqua2kVzWs6/ZedDUduuhExOmkUfBPkbqm\nD5EGZr6fN/kM8FPgNuDn+fZnO+2yQ1lXAt/J+7qZiUE5I9djJal1OpAWb1pEPAEcBnw8b/dx4D0R\n8WS38gtqfn7j/Y8DH5T0DCkQL2ja9lTgvNy9fX+3giQdTvogH5dXnQjsI+nIgnU7nnQIcj/wY+Cb\nEbGoW7kNvktq9R6X9NO87mhSsN1JGrz8LqlHBWnk+sb8+r9PGtRanh87lc6v/WukQcTxz9D3XnxR\nEc/l1/JdSU+QjvkvaXj8buDbwP15/6+j+3vRVdVbcL106DiCwqVDSV3LGcC5EXHaEMo8lxTcq8e7\njcMgaUfSiPX2pBbraxHxlQGXOYsUtJuRGpKLImLhIMtsKn8GKRAfjojDh1TmctKZlI3AuojYb4Bl\nRdFvhCOAiJhU978fRnaJX37zv0o6zbYXcGTuJg3aolzmsK0HToyIvYD9gb8c9OvNo94HR8Q+pLMA\n8yUN7APfwgmkVnyYNgLzImKfQQb3uKq34KO8hnc/4N6IeDCP+l5AukBkoCLiWtKFDUMVEasiYkm+\n/RxpcGd252f1pdw1+eYsUis+lC5b7rG8mzS6P0xiiJ9rB3h7s5l4yuNhhvCBrwJJc0kt6o1DKGuG\npFtJA1hXRMTNgy4zO4N0SmzYx4ABXJF/F/HhQRc2HU6T2SRI2pJ0OueE3JIPVL5AZR9gR+CtkvYc\ndJmS3kMa41hCalGHeex5QETsS+o9/KWk3x1kYWVG0SXtKOkqSXfkawL+b15/gaRb8vKApFsannOy\npHsl3SXpkDa7nlC/UVlJOvc8bkeGcx56ZCRtQgrub0TEJd2276eIeEbS1cChDP64+ADgcEnvJjVg\nr5R0XkT86YDLJSIeyX8flXQx6VDw2kGVV7L7PT4usyR/8f9M0hURMX7VH5L+gXQ6EUl7AH9COqW7\nI3ClpDc2XGT1MqNswW8GdlP6Te9mpIHGHwyp7GG3KuP+FbgzIr48jMLydQdb5dubA+8kXZI7UBFx\nSkTsHBG7kt7Xq4YR3JK2yIEyfvHPIaRr9AemTAtecFzmT0jX70Mao7ogItbnU4v3kr7A2hpZgOfr\ntj9KuijiDlLFe0piMBmSzif98GV3SQ9N8lLJMuUeAHwQ+D2lhAu35NOEg/R64GpJS0jH+5dHxGUD\nLnOUtgeuzWMONwCXRsTiQRbYr0G2VuMykt4OrIqI+/Oq5nGrlXQZtxplF52I+BG9/6ik1zKPGmZ5\nDeVeR/q12TDLXEr6xdTIRMQ1pJ+CDqOsB+j8o6C+60cAdRiXOZJ0cU7PRhrgZlNdu9b5prx0025c\nRim5xvuY+AW9konX4ncdtxrplWxmU5mkeKDgtrvQ+ko2pXRXj0XEiU3rDwVOioiDG9btCXwLeCup\na34F0HGQzS24WQllRtEbxmWW5nGDAE7Jh64foKl7HhF3SrqQdBZkHXBcp+AGt+BmPZMUq7pvBqRf\n2oziWnS34GYlbFo0gtYPtBpt9S3AJbkrYLUwmZZ2k+kS4JCyPkzWGDCvRJkLeyq1HyX3ajqVO4oy\ny5Y7uV/TbjrUE5+T5y66WQmFW/ARqXj1zKpt01mjrkFnIw/wudOu5OlU7ijKHHK5I4+gzkZevbnT\nruTpVO4oyhxyuSOPoM4qXj2ziqt4BFW8emYV51F0sxqreARVvHpmFVfxUfRCCR8kHSppmaR7JJ3U\n/Rlm00TFpzbpWnRD/vJ3kOb0ulnSJREx8NQ/ZpVX8T5wkRZ8JPnLzaaEmQWXESkS4NM2f7lZVyW6\n6C3SJh/f9PhfSdoo6dUN66ZM2mSzqa9cBLVKm7w4IpblmWHeScMc6L2kTS5SvcL5y8cabs9llFep\nmRW1PC89KhHgearlVfn2c5LG0yYv46WZYRpTib+YNhlYLmk8bXLbGXKKVO/F/OWkCdiPIGV7fJl5\nBXZmVi1zmdgUTTIBbJ9OkzWmTc5TQq+IiKXShJ+mzwaub7hfPm1yRGyQNJ6/fHya34HnLzebEvpw\nkNuYNhnYAJxC6p6XVqh6o8hfbjYltBkhH/sVjD3a/enNaZMl/TdSl+LnSs33jsAtedrnSU/35UE2\nszLaRNC8HdIybmH72eAmTGcVEbeTcjQCIOkBYN+IeFLSD4BvSTqd1DXfjS7p1x3gZmWUiKAuaZPH\nBXkevV7SJjvAzcoocRFLkems8gSOjfc/D3y+aBkOcLMyKh5BFa+eWcW9YtQV6MwBblaGEz6Y1VjF\nI6ji1TOruIpHUMWrZ1Zx7qKb1VjFI6iv1et9nrDexWmTm0uqX7RgBHMtPn/q8Mu0zqZTgJtNOxVP\nuugANyuj4hFU8eqZVVzFI6ji1TOrOI+im9VYxSOo4tUzq7iKR1ChmU3MrI0SedHbpU2W9H5Jt0va\nIGnfpuc4bbLZ0JT7NVnLtMnAUuAPgX9u3HhQaZPNrJ0BpE2OiP8AUFNKVXpIm+wuulkZfZq6qDFt\ncofNmmcZ6po2uWuASzpX0mpJt3Wvptk004fZRRvTJkfEc/2uXjeLgDOB8/pZsFkttImgsdvS0k1z\n2uQum68Edmq4Xz5tckRcm2c1MbNmbbrf8/ZJy7iF57fdw4S0yS00HoePp00+A6dNNhuCEqPo7dIm\n572eCWwH/JukJREx32mTzYZtcGmTv9/mOaNMmzzWcHsunl/Uqm85o5pddBiKVk9MPBZoY16JqpiN\nwlxKzS5a8QAvcprsfOAnwO6SHpJ0zOCrZTZF9OE02SAVGUU/ahgVMZuS/HNRsxqreARVvHpmFeec\nbGY1VvEIqnj1zCqu4hFU8eqZVVzFI6ji1TOrtvAoull9bah4BFW8embV5gA3q7G1szYruOULA61H\nOw5wsxI2zKz2QfiUD3Cd9PRIyo3fKvDbmz7TsuHP3mqdbSh5raqkc4HDgNUR8aa87s3AP5F+Fz7+\nu++f5sdOBo4lZWQ9ISIWd9q/ky6albCemYWWDhYB72pa90VgQUTsAywA/h5A0p68lDZ5PnB2i8yr\nEzjAzUrYwCaFlnYi4lrgyabVG4Gt8u2teSnv2uHktMkRsRwYT5vc1pTvopuNUtkuehsfAy6X9CVS\nHoa35fWzgesbtiufNtnM2tvAzELLJP0F6fh6Z1Kw/2uv9XMLblbCWlqfJrtp7HluGnu+190eHREn\nAETERZLOyev7nzbZzNprd3z92/O25Lfnbfni/bMWdjzb05wSbaWkgyLiGknvIB1rg9Mmmw1XH06T\nnU9KZritpIdIo+YfBr4iaSbwPPARAKdNNhuysgHeISXa77TZfpRpk82mly7nuEfOAW5WQqdz3FVQ\nJG3yjpKuknSHpKWSjh9GxcymggGdJuubIl8/64ETI2JJnub0Z5IWR8SyAdfNrPJeaHOarCqK5EVf\nBazKt5+TdBdpiN4BbtNerY7BJc0F9gZuHERlzKaaqh+DF65d7p5fRLqE7rnWW4013J6LJx+06ltO\nmckHR3l8XUShAJe0CSm4vxERl7Tfcl5fKmU2PHMpM/lgLQKcdLH7nRHx5UFWxmyqmfLH4JIOAD4I\nLJV0KxDAKRHxo0FXzqzqXqj43EVFRtGvo/JzKJqNRl266GbWwpTvoptZe7U5TWZmL1f1LrpTNpmV\nUPZadEnnSlot6baGdQskPSzplrwc2vDYyZLulXSXpEO61c8tuFkJfWjBFwFnAuc1rT89Ik5vXCFp\nD15Km7wjcKWkN3ZK+uAANythbcnTZBFxraQ5LR5qle/8veS0ycBySeNpk9teOu4uulkJA/y56Ecl\nLZF0jqTxHOmzgRUN2zhtstkgDSjAzwZ2jYi9Sb/k/FKv9XMX3ayEdufBHxhbwQNjK1o+1k1EPNpw\n92vApfm20yabDVO78+A7z9uFneft8uL9qxde33K7bELaZEmvy3kYAN4H3J5vT8e0yad332QARjHT\nZ9y/cOhlAmjX3lqi8i7tvsmIDSht8sGS9ibNUbYc+N/gtMlmQzegtMmLOmzvtMlmw9Ju6qKqcICb\nleBr0c1qrOrXojvAzUpwgJvVmH8PblZjPgY3qzF30c1qbMpPXSRpFvBjYLO8/UURMZpLqswqZsof\ng0fEWkkHR8QaSTOB6yT9MCI6XgNrNh3U4hg8Itbkm7Pyczpe/2o2XdTiGFzSDOBnwBuAsyLi5oHW\nymyKqEWAR8RGYB9JrwK+L2nPiLhzsFUzq74pfwzeKCKekXQ1cCjpJ2tNxhpuz8Wzi1r13ZOX3kz5\nY3BJ2wHrIuJpSZsD7wS+0Hrref2sm9kQ7J6XcZdN6tllT5NJOhc4DFgdEW/K674I/D6wFrgPOCYi\nnsmPnQwcC6wnTeW9uNP+i+Rkez1wtaQlpOyNl0fE5P4LZjW1npmFlg4WAe9qWrcY2CvnZLsXOBlA\n0p68lDZ5PnC2pFbZV19U5DTZUmDfbtuZTUdlu+it0iZHxJUNd28A/ijfPpxJpk2u9gGEWcUNYRT9\nWODb+fZsoDG5W9e0yQ5wsxLaBfizY7fw7NitpfYt6ZOk8a9vd924DQe4WQntAnyLeW9hi3lvefH+\nIwvbpllrSdKHgHcDv9ew2mmTzYap7NRFWXPa5EOBvwYOjIi1DdtNx7TJZqMzoLTJp5B+3HVFHiS/\nISKOc9pksyFz2mSzGqvVpapmNtGUv1TVzNqrxa/JzKw1B3htHTD0ErXr8Cc8BLg+DhtJufvr30ZQ\n6nGT2nrtC1M8J5uZtbdhfbVDqNq1M6u4DevdRTerLQe4WY2tX+cAN6utjRuqHULVrp1Z1bmLblZj\nz1c7hKpdO7OqWz/qCnRWJOmimbWzvuDShqQTJC3Ny/F53TaSFku6W9LlkrbqtXoOcLMySgS4pL2A\nPwN+B9gbOEzSG4BPAFdGxG8CV5GzqvaicIBLmiHpFkk/6LUws9pZV3BpbQ/gxohYGxEbSLP4vo+U\nPfXreZuvA3/Qa/Um04KfQMvZTMymsQ0Fl9ZuB96eu+RbkHKw7QRsHxGrASJiFfDaXqtXKMAl7ZgL\nP6fXgsxqqUQXPSKWAacBV5CmVLmV1l8HPc/mW3QU/QxSErieD/bNaun5Nut/Pga3jXV9ekQsIqdo\nkvRZYAWwWtL2EbFa0uuAX/VavSJzk72HNG/SEknzaMj+aDbttRsh32teWsZ9c2HLzSS9JiIelbQz\n8IfA/wB2AT5Eat2PBi7ptXpFWvADgMMlvRvYHHilpPMi4k9fvulYw+25eHZRq77rmThZyCSVPw/+\nPUmv5qUsqc9IOg24UNKxwIOk+ch6UmRuslNIaVyRdBDwV62DGzy7qE09++dl3BmTe3rJAI+IA1us\newL4n+X2nPhKNrMy2p8Cq4RJBXhEXANcM6C6mE097U+BVYJbcLMyKn4tugPcrIx2p8kqwgFuVoZb\ncLMac4Cb1ZgD3KzG6nSazMya+DSZWY15FN2sxnwMblZjPgavq+tGUObmIygT9tfnR1Lu8th36GXO\nneyPoX0MblZjFe+iO6uqWRnl0yZvJem7ku6SdIektzptsllVlMuqCvBl4LKI2AN4M7CMUaRNNrMW\n1hZcWpD0KuDtOS8bEbE+Ip4G3ssI0iabWbNyXfRdgMckLcpzDvxLTp883LTJZtZGuS76JsC+wFkR\nsS/wa1L3vDlN8sDTJptZK+1Okz06Bo+NdXv2w8CKiPhpvv89UoAPL22ymXXQrvu9zby0jFv28rTJ\nOYBXSNo9Iu4B3gHckZcPMaS0yWbWTvnz4McD35K0KXA/cAwwk2GlTTazDkpeqhoRPwfe0uKh4aVN\nlrQceBrYCKyLiP36UbjZlNfmFFhVFG3BNwLzIuLJQVbGbMqp+KWqRQNc+JSa2ctV/NdkRYM2gCsk\n3Szpw4OskNmUUm5+8IEr2oIfEBGPSHoNKdDviohrB1kxsymhDl30iHgk/31U0sXAfkCLAB9ruD0X\nzy5qVXf92AvcMFain13xAFdE56vg8rWxMyLiOUm/ASwGFkbE4qbtAhYMrqbGqBI+wJtGUuryOHro\nZc7Vo0REobQPkoLdCl5F+gsV3m8/FWnBtwcuTgHMJsC3moPbbNqa6qfJIuIBYO8h1MVs6ql4F91X\nspmVUfHTZA5wszKcdNGsxtxFN6sxB7hZjVX8GNzXl5uVUSInm6RZkm6UdKukpZIW5PV1Spu83OUO\nxX0jKPO2EZSZrk6bCiJiLXBwROxDOhU9X9J+1Ctt8nKXOxT3j6DM0QR4qUtPhywi1uSbs0iHzIHT\nJpvVg6QZkm4FVgFXRMTN9DFtsgfZzEop11uIiI3APnkShIsl7UUf0yZ3/bFJ4R2la9XNprxJ/diE\nNW0e/XFexn2u634l/S1ph39OyqA0njb56jy10aT1LcDNppsU4E8X3HqrlwW4pO1IOQ6flrQ5cDnw\nBeAg4ImIOE3SScA2EfGJXuroLrpZKf9V5smvB74uaQZpPOw7EXGZpBvoU9pkt+BmPUot+IqCW+9U\n2d+Dm1lb1b5W1QFuVkq1z7k7wM1KcQtuVmNuwc1qrNQo+sA5wM1KcRfdrMbcRTerMbfgZjXmFtys\nxtyCm9WYW3CzGvNpMrMacwtuVmPVPgZ3TjazUtYVXFqTdKikZZLuyckd+sotuFkpvbfgOdHDV4F3\nAL8EbpZ0SUQs61PlHOBm5ZQ6Bt8PuDciHgSQdAEpZbID3KwaSh2Dz2ZiSpiHSUHfNw5ws1J8msys\nrh6EU+cU3HZ1i3UrgZ0b7u+Y1/WNky6ajYikmcDdpEG2R4CbgCMj4q5+leEW3GxEImKDpI8Ci0mn\nrM/tZ3CDW3CzWvOFLmY15gA3qzEHuFmNOcDNaswBblZjDnCzGnOAm9WYA9ysxv4/AA2FyxG6yD0A\nAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xcdd6320>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import pylab as pl\n",
    "pl.matshow(test_cm)\n",
    "pl.title('Confusion matrix for test data')\n",
    "pl.colorbar()\n",
    "pl.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 计算一下其他的对预测效果的评估指标\n",
    "# 详细内容请看 [6],[7],[8],[9]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy = 0.900337\n"
     ]
    }
   ],
   "source": [
    "# Accuracy\n",
    "print(\"Accuracy = %f\" %(skm.accuracy_score(test_truth,test_pred)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Precision = 0.902996\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda2\\lib\\site-packages\\sklearn\\metrics\\classification.py:1203: DeprecationWarning: The default `weighted` averaging is deprecated, and from version 0.18, use of precision, recall or F-score with multiclass or multilabel data or pos_label=None will result in an exception. Please set an explicit value for `average`, one of (None, 'micro', 'macro', 'weighted', 'samples'). In cross validation use, for instance, scoring=\"f1_weighted\" instead of scoring=\"f1\".\n",
      "  sample_weight=sample_weight)\n"
     ]
    }
   ],
   "source": [
    "# Precision\n",
    "print(\"Precision = %f\" %(skm.precision_score(test_truth,test_pred)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Recall = 0.900337\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda2\\lib\\site-packages\\sklearn\\metrics\\classification.py:1304: DeprecationWarning: The default `weighted` averaging is deprecated, and from version 0.18, use of precision, recall or F-score with multiclass or multilabel data or pos_label=None will result in an exception. Please set an explicit value for `average`, one of (None, 'micro', 'macro', 'weighted', 'samples'). In cross validation use, for instance, scoring=\"f1_weighted\" instead of scoring=\"f1\".\n",
      "  sample_weight=sample_weight)\n"
     ]
    }
   ],
   "source": [
    "# Recall\n",
    "print(\"Recall = %f\" %(skm.recall_score(test_truth,test_pred)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "F1 score = 0.900621\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda2\\lib\\site-packages\\sklearn\\metrics\\classification.py:756: DeprecationWarning: The default `weighted` averaging is deprecated, and from version 0.18, use of precision, recall or F-score with multiclass or multilabel data or pos_label=None will result in an exception. Please set an explicit value for `average`, one of (None, 'micro', 'macro', 'weighted', 'samples'). In cross validation use, for instance, scoring=\"f1_weighted\" instead of scoring=\"f1\".\n",
      "  sample_weight=sample_weight)\n"
     ]
    }
   ],
   "source": [
    "# F1 Score\n",
    "print(\"F1 score = %f\" %(skm.f1_score(test_truth,test_pred)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 下面是一些参考资料\n",
    "\n",
    "[1] Original dataset as R data https://spark-public.s3.amazonaws.com/dataanalysis/samsungData.rda  \n",
    "[2] Human Activity Recognition Using Smartphones http://archive.ics.uci.edu/ml/datasets/Human+Activity+Recognition+Using+Smartphones  \n",
    "[3] Android Developer Reference http://developer.android.com/reference/android/hardware/Sensor.html  \n",
    "[4] Random Forests http://en.wikipedia.org/wiki/Random_forest  \n",
    "[5] Confusion matrix http://en.wikipedia.org/wiki/Confusion_matrix\n",
    "[6] Mean Accuracy http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=1054102&url=http%3A%2F%2Fieeexplore.ieee.org%2Fxpls%2Fabs_all.jsp%3Farnumber%3D1054102\n",
    "\n",
    "[7] Precision http://en.wikipedia.org/wiki/Precision_and_recall\n",
    "[8] Recall http://en.wikipedia.org/wiki/Precision_and_recall\n",
    "[9] F Measure http://en.wikipedia.org/wiki/Precision_and_recall"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
